chr <- 21
chr_pvalues_grubert <- fst::read.fst("chr_pvalues_grubert_chr21.fst")
head(chr_pvalues_grubert)
## SNP gene beta tstat pvalue beta_se maf_sample
## 1 rs183048582 64684 -0.30947859 -1.1006867 0.2746493 0.2811687 0.07894737
## 2 rs183048582 64685 0.03130456 0.1101570 0.9125872 0.2841814 0.07894737
## 3 rs183048582 64686 -0.04461388 -0.1545630 0.8775924 0.2886453 0.07894737
## 4 rs183048582 64687 0.56033736 2.0317715 0.0458169 0.2757876 0.07894737
## 5 rs183048582 64688 0.09431013 0.3271629 0.7444810 0.2882666 0.07894737
## 6 rs183048582 64689 -0.03752318 -0.1300745 0.8968652 0.2884744 0.07894737
## svars maf_sample_bin svars_bin maf_bin_uneq
## 1 0.4017506 2 [0.364,0.410) [0.0724,0.0987)
## 2 0.4017506 2 [0.364,0.410) [0.0724,0.0987)
## 3 0.4017506 2 [0.364,0.410) [0.0724,0.0987)
## 4 0.4017506 2 [0.364,0.410) [0.0724,0.0987)
## 5 0.4017506 2 [0.364,0.410) [0.0724,0.0987)
## 6 0.4017506 2 [0.364,0.410) [0.0724,0.0987)
#histogram
quick_hist = function(values_vec, breaks=50, title = NULL, threshold_proportion = 10^(-4)) {
res = hist(values_vec, plot=FALSE, breaks=breaks)
proportion <- mean(values_vec <= threshold_proportion)
proportion <- formatC(proportion, format = "e", digits = 2)
dat = data.frame(xmin=head(res$breaks, -1L),
xmax=tail(res$breaks, -1L),
ymin=0.0,
ymax=res$counts)
ggplot(dat, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)) +
geom_rect(size=0.5, colour="grey30", fill="grey80") +
ggtitle(paste(title, "proportion", proportion)) +
ylab("pvalue count")
}
quick_hist(chr_pvalues_grubert$pvalue, breaks = 400)
chr_pvalues_grubert_split_maf_sample_bin <- chr_pvalues_grubert %>%
select(pvalue, maf_sample_bin, maf_sample) %>%
split(chr_pvalues_grubert$maf_sample_bin)
maf_sample_bin_hist <- lapply(chr_pvalues_grubert_split_maf_sample_bin, function(maf_sample_bin_i){
title <- unique(maf_sample_bin_i$maf_sample_bin)
title <- paste0(title, ", ")
quick_hist(maf_sample_bin_i$pvalue, breaks = 400, title = title)
})
for(maf_sample_bin_i in maf_sample_bin_hist){
print(maf_sample_bin_i)
}
proportions_maf_sample_bin <- sapply(chr_pvalues_grubert_split_maf_sample_bin, function(maf_sample_bin_i){
threshold_proportion = 10^(-4)
proportion <- mean(maf_sample_bin_i$pvalue <= threshold_proportion)
})
plot(names(chr_pvalues_grubert_split_maf_sample_bin), proportions_maf_sample_bin)
chr_pvalues_grubert_split_maf_sample_bin <- chr_pvalues_grubert %>%
select(pvalue, maf_bin_uneq, maf_sample) %>%
split(chr_pvalues_grubert$maf_bin_uneq)
maf_sample_bin_uneq_hist <- lapply(chr_pvalues_grubert_split_maf_sample_bin, function(maf_sample_bin_i){
title <- unique(maf_sample_bin_i$maf_bin_uneq)
title <- paste0(title, ", ")
quick_hist(maf_sample_bin_i$pvalue, breaks = 400, title = title)
})
for(maf_sample_bin_uneq_i in maf_sample_bin_uneq_hist){
print(maf_sample_bin_uneq_i)
}
#IHW
chr_pvalues_grubert_p_adjust <- p.adjust(chr_pvalues_grubert$pvalue, method = "BH")
sum(chr_pvalues_grubert_p_adjust <= 0.05)
## [1] 1543
ihw_maf_equal <- ihw(chr_pvalues_grubert$pvalue, chr_pvalues_grubert$maf_sample_bin, alpha = 0.05, lambdas = Inf, nfolds = 3)
rejections(ihw_maf_equal)
Gives “Error: cannot allocate vector of size 687.9 Mb”